*************************************************************************************************************************************;
*This do file estimates the initial conditions for the model (Table 4, distribution of skills in grade 9 by neighborhoods)
*************************************************************************************************************************************;

*Open data;
use $scratch\final_data_for_model.dta, clear


factor cogn1_tilde cogn2_tilde cogn3_tilde cogn4_tilde cogn5_tilde, factors(1)
predict skills_hat if e(sample), bartlett
qui sum skills_hat if class==9
replace skills_hat = (skills_hat-r(mean))/r(sd)


egen mean_skills_school_9 = mean(skills_hat) if class==9, by(idschool)
egen sd_skills_school_9 = sd(skills_hat) if class==9, by(idschool)

gen n_kids = 1 
collapse  mean_skills_school_9 sd_skills_school_9  (count) n_kids if class==9 & school_size>200, by(idschool)

xtile quartiles_schools = mean_skills_school, nquantiles(4)

drop if quartiles_schools==.

preserve

keep quartiles_schools idschool

save $scratch\schools_neighborhoods_id.dta, replace

restore

drop if n_kids<=10
bysort quartiles_schools: sum mean_skills_school_9 , de 
bysort quartiles_schools: sum sd_skills_school_9 , de 
bysort quartiles_schools: sum n_kids , de 


gen mean_neighborhood = .
gen sd_neighborhood   = .
gen total_kids        = .


forvalues j = 1(1)4{

qui sum mean_skills_school_9 if quartiles_schools==`j'
replace mean_neighborhood = r(mean) in `j'

qui sum sd_skills_school_9 if quartiles_schools==`j'
replace sd_neighborhood = r(mean) in `j'

qui sum n_kids if quartiles_schools==`j'
replace total_kids = r(mean) in `j'

}

replace total_kids = round(total_kids)

keep mean_neighborhood  sd_neighborhood  total_kids
keep in 1/4

*Store the Initial Conditions for the Model;
outsheet using "$matlab_dir\initial_conditions.txt", nonames replace

gen ind_ = 1 
gen neighborhood = _n
reshape wide mean_neighborhood sd_neighborhood total_kids, i(ind_) j(neighborhood)
drop ind_
sum 
